# -*- coding: utf-8 -*-
"""ElasticNet.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1i_8qBC2wRxYvzJPOckKZQ9iEvAgvpjAN
"""

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# 1. Generate some synthetic data with relevant, irrelevant, and correlated features
np.random.seed(42)
n_samples = 100
n_relevant_features = 3
n_irrelevant_features = 5
n_correlated_features = 2 # Let's create two highly correlated features
n_total_features = n_relevant_features + n_irrelevant_features + n_correlated_features

X = np.random.rand(n_samples, n_total_features)

# Introduce correlation for the last two features
X[:, n_total_features - 2] = X[:, 0] * 0.7 + np.random.rand(n_samples) * 0.3 # Correlated with feature 0
X[:, n_total_features - 1] = X[:, n_total_features - 2] * 0.9 + np.random.rand(n_samples) * 0.1 # Highly correlated with previous feature

# True coefficients: some non-zero, some zero (irrelevant), and some for correlated
true_coefficients = np.array([2.5, -1.0, 3.0] + [0.0] * n_irrelevant_features + [1.5, 1.2]) # Last two for correlated
y = X @ true_coefficients + np.random.randn(n_samples) * 0.5 # Add some noise

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale features (important for regularization methods)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 2. Apply Elastic Net Regression

# Elastic Net has two main hyperparameters:
#   alpha (overall regularization strength, similar to Ridge/Lasso)
#   l1_ratio (mixing parameter between L1 and L2)
#     l1_ratio = 1: Elastic Net becomes Lasso
#     l1_ratio = 0: Elastic Net becomes Ridge (note: when l1_ratio=0, the penalty is purely L2,
#                   but alpha still scales the L2 penalty, effectively making it like Ridge's alpha)

# We'll use GridSearchCV to find optimal hyperparameters
param_grid = {
    'alpha': np.logspace(-4, 0, 5),  # Example values for alpha (0.0001 to 1)
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99, 1.0] # Mixing parameter
}

elastic_net = ElasticNet(max_iter=10000, random_state=42) # Increase max_iter for convergence
grid_search = GridSearchCV(elastic_net, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

best_elastic_net = grid_search.best_estimator_
y_pred_elastic = best_elastic_net.predict(X_test_scaled)

mse_elastic = mean_squared_error(y_test, y_pred_elastic)
r2_elastic = r2_score(y_test, y_pred_elastic)

print("--- Elastic Net Regression (with GridSearchCV) ---")
print(f"Best Alpha: {best_elastic_net.alpha}")
print(f"Best L1 Ratio: {best_elastic_net.l1_ratio}")
print(f"Elastic Net Coefficients (rounded to 3 decimal places): {np.round(best_elastic_net.coef_, 3)}")
print(f"Elastic Net Intercept: {best_elastic_net.intercept_:.4f}")
print(f"Elastic Net Mean Squared Error: {mse_elastic:.4f}")
print(f"Elastic Net R-squared: {r2_elastic:.4f}")
print(f"Number of non-zero coefficients: {np.sum(best_elastic_net.coef_ != 0)}")

# 3. Compare with standard Linear Regression (OLS)
print("\n--- Linear Regression (OLS) ---")
linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)
y_pred_linear = linear_model.predict(X_test_scaled)

mse_linear = mean_squared_error(y_test, y_pred_linear)
r2_linear = r2_score(y_test, y_pred_linear)

print(f"Linear Regression Coefficients (rounded to 3 decimal places): {np.round(linear_model.coef_, 3)}")
print(f"Linear Regression Intercept: {linear_model.intercept_:.4f}")
print(f"Linear Regression Mean Squared Error: {mse_linear:.4f}")
print(f"Linear Regression R-squared: {r2_linear:.4f}")

# 4. Compare with Lasso Regression (for coefficient sparsity)
print("\n--- Lasso Regression (for comparison) ---")
# Using the best alpha found by GridSearchCV for ElasticNet, but with l1_ratio=1 (pure Lasso)
lasso_model = Lasso(alpha=best_elastic_net.alpha, max_iter=10000, random_state=42)
lasso_model.fit(X_train_scaled, y_train)
y_pred_lasso = lasso_model.predict(X_test_scaled)

mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

print(f"Lasso Coefficients (rounded to 3 decimal places): {np.round(lasso_model.coef_, 3)}")
print(f"Lasso Intercept: {lasso_model.intercept_:.4f}")
print(f"Lasso Mean Squared Error: {mse_lasso:.4f}")
print(f"Lasso R-squared: {r2_lasso:.4f}")
print(f"Number of non-zero coefficients: {np.sum(lasso_model.coef_ != 0)}")

# 5. Compare with Ridge Regression (for coefficient shrinkage)
print("\n--- Ridge Regression (for comparison) ---")
# Using the best alpha found by GridSearchCV for ElasticNet, but with l1_ratio=0 (pure Ridge)
ridge_model = Ridge(alpha=best_elastic_net.alpha, random_state=42)
ridge_model.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_model.predict(X_test_scaled)

mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print(f"Ridge Coefficients (rounded to 3 decimal places): {np.round(ridge_model.coef_, 3)}")
print(f"Ridge Intercept: {ridge_model.intercept_:.4f}")
print(f"Ridge Mean Squared Error: {mse_ridge:.4f}")
print(f"Ridge R-squared: {r2_ridge:.4f}")
print(f"Number of non-zero coefficients: {np.sum(ridge_model.coef_ != 0)}")


# Optional: Visualize the coefficients for different l1_ratio values at a fixed alpha
# (This can be more complex to visualize across both alpha and l1_ratio)
# Let's pick a fixed alpha (e.g., the best alpha found) and vary l1_ratio
alpha_for_plot = best_elastic_net.alpha # Or pick a specific value like 0.1
l1_ratios_plot = np.linspace(0.01, 1.0, 50) # Avoid 0.0 for l1_ratio as it can cause issues for some solvers
coefs_elastic_net_l1_variation = []

for l1_ratio_val in l1_ratios_plot:
    elastic_net_temp = ElasticNet(alpha=alpha_for_plot, l1_ratio=l1_ratio_val, max_iter=10000, random_state=42)
    elastic_net_temp.fit(X_train_scaled, y_train)
    coefs_elastic_net_l1_variation.append(elastic_net_temp.coef_)

plt.figure(figsize=(10, 6))
ax = plt.gca()
ax.plot(l1_ratios_plot, coefs_elastic_net_l1_variation)
plt.xlabel("L1 Ratio (0=Ridge, 1=Lasso)")
plt.ylabel("Coefficients")
plt.title(f"Elastic Net Coefficients as a Function of L1 Ratio (alpha={alpha_for_plot:.4f})")
plt.legend([f"Feature {i}" for i in range(n_total_features)], loc='center left', bbox_to_anchor=(1, 0.5))
plt.axis("tight")
plt.show()